# load required libraries
library(tidyverse)
── Attaching packages ──────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.0.0     ✔ purrr   0.2.5
✔ tibble  1.4.2     ✔ dplyr   0.7.6
✔ tidyr   0.8.1     ✔ stringr 1.3.1
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(langcog) # source: https://github.com/langcog/langcog-package

Attaching package: ‘langcog’

The following object is masked from ‘package:base’:

    scale
library(psych)

Attaching package: ‘psych’

The following objects are masked from ‘package:ggplot2’:

    %+%, alpha
library(lme4)
Loading required package: Matrix

Attaching package: ‘Matrix’

The following object is masked from ‘package:tidyr’:

    expand
library(cowplot)

Attaching package: ‘cowplot’

The following object is masked from ‘package:ggplot2’:

    ggsave
# set theme for ggplots
theme_set(theme_bw())

Data preparation

d_all <- # d_us_ad_pilot %>% rownames_to_column("subid") %>%
  d_gh_ad %>% rownames_to_column("subid") %>%
  full_join(d_gh_ch %>% rownames_to_column("subid")) %>%
  full_join(d_th_ad %>% rownames_to_column("subid")) %>%
  full_join(d_th_ch %>% rownames_to_column("subid")) %>%
  full_join(d_vt_ad %>% rownames_to_column("subid")) %>%
  full_join(d_vt_ch %>% rownames_to_column("subid")) %>%
  column_to_rownames("subid")
Joining, by = c("subid", "choose what to do", "feel guilty", "feel happy", "feel love", "feel pain", "feel proud", "feel sad", "feel scared", "feel shy", "feel sick, like when you feel like you might vomit", "feel tired", "figure out how to do things", "get angry", "get hungry", "get hurt feelings", "hear things", "pray", "remember things", "sense temperatures", "sense when things are far away", "smell things", "think about things")
Joining, by = c("subid", "choose what to do", "feel guilty", "feel happy", "feel love", "feel pain", "feel proud", "feel sad", "feel scared", "feel shy", "feel sick, like when you feel like you might vomit", "feel tired", "figure out how to do things", "get angry", "get hungry", "get hurt feelings", "hear things", "pray", "remember things", "sense temperatures", "sense when things are far away", "smell things", "think about things")
Joining, by = c("subid", "choose what to do", "feel guilty", "feel happy", "feel love", "feel pain", "feel proud", "feel sad", "feel scared", "feel shy", "feel sick, like when you feel like you might vomit", "feel tired", "figure out how to do things", "get angry", "get hungry", "get hurt feelings", "hear things", "pray", "remember things", "sense temperatures", "sense when things are far away", "smell things", "think about things")
Joining, by = c("subid", "choose what to do", "feel guilty", "feel happy", "feel love", "feel pain", "feel proud", "feel sad", "feel scared", "feel shy", "feel sick, like when you feel like you might vomit", "feel tired", "figure out how to do things", "get angry", "get hungry", "get hurt feelings", "hear things", "pray", "remember things", "sense temperatures", "sense when things are far away", "smell things", "think about things")
Joining, by = c("subid", "choose what to do", "feel guilty", "feel happy", "feel love", "feel pain", "feel proud", "feel sad", "feel scared", "feel shy", "feel sick, like when you feel like you might vomit", "feel tired", "figure out how to do things", "get angry", "get hungry", "get hurt feelings", "hear things", "pray", "remember things", "sense temperatures", "sense when things are far away", "smell things", "think about things")

Shared conceptual structure

Pooling all participants from all sites together into a common factor structure.

Parallel analysis

How many factors to retain?

reten_all_PA <- fa.parallel(d_all, plot = F); reten_all_PA
Parallel analysis suggests that the number of factors =  4  and the number of components =  2 
Call: fa.parallel(x = d_all, plot = F)
Parallel analysis suggests that the number of factors =  4  and the number of components =  2 

 Eigen Values of 
  Original factors Simulated data Original components simulated data
1             8.94           0.35                9.52           1.27
2             1.46           0.25                2.08           1.23
3             0.50           0.21                1.11           1.20
4             0.25           0.18                0.86           1.17
reten_all_par <- reten_all_PA$nfact

What are these factors?

efa_all_par <- fa(d_all, nfactors = reten_all_par, rotate = "oblimin",
                  scores = "tenBerge", impute = "median")
Loading required namespace: GPArotation
heatmap_fun(efa_all_par) + 
  labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = "factor"

Which capacities are attributed to which targets?

scoresplot_fun(efa_all_par, target = "all") + 
  labs(title = "Parallel Analysis")

|======================================                 | 70% ~1 s remaining     
|========================================               | 73% ~1 s remaining     
|=========================================              | 76% ~1 s remaining     
|===========================================            | 78% ~1 s remaining     
|============================================           | 82% ~0 s remaining     
|==============================================         | 85% ~0 s remaining     
|================================================       | 89% ~0 s remaining     
|==================================================     | 92% ~0 s remaining     
|====================================================   | 95% ~0 s remaining     
|====================================================== | 99% ~0 s remaining     
Ignoring unknown aesthetics: y

scoresplot_fun(efa_all_par, target = c("ghosts", "God", "children")) + 
  labs(title = "Parallel Analysis")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedIgnoring unknown aesthetics: y

itemsplot_fun(efa_all_par, target = c("ghosts", "God", "children")) +
  labs(title = "Parallel Analysis")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedJoining, by = "capacity"

|=============================                          | 53% ~2 s remaining     
|==============================                         | 55% ~2 s remaining     
|===============================                        | 57% ~2 s remaining     
|================================                       | 59% ~2 s remaining     
|=================================                      | 60% ~1 s remaining     
|==================================                     | 62% ~1 s remaining     
|==================================                     | 63% ~1 s remaining     
|====================================                   | 66% ~1 s remaining     
|=====================================                  | 67% ~1 s remaining     
|=====================================                  | 69% ~1 s remaining     
|=======================================                | 71% ~1 s remaining     
|=======================================                | 72% ~1 s remaining     
|========================================               | 74% ~1 s remaining     
|=========================================              | 76% ~1 s remaining     
|==========================================             | 77% ~1 s remaining     
|===========================================            | 79% ~1 s remaining     
|===========================================            | 80% ~1 s remaining     
|============================================           | 81% ~1 s remaining     
|=============================================          | 83% ~1 s remaining     
|==============================================         | 85% ~1 s remaining     
|===============================================        | 87% ~0 s remaining     
|================================================       | 89% ~0 s remaining     
|=================================================      | 91% ~0 s remaining     
|===================================================    | 93% ~0 s remaining     
|====================================================   | 95% ~0 s remaining     
|=====================================================  | 97% ~0 s remaining     
|====================================================== | 99% ~0 s remaining     
Joining, by = c("factor", "capacity", "order")

Minimizing BIC

How many factors to retain?

reten_all_BIC <- VSS(d_all, plot = F); reten_all_BIC

Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm, 
    n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.89  with  1  factors
VSS complexity 2 achieves a maximimum of 0.93  with  2  factors

The Velicer MAP achieves a minimum of 0.01  with  2  factors 
BIC achieves a minimum of  -613.26  with  4  factors
Sample Size adjusted BIC achieves a minimum of  -157.21  with  5  factors

Statistics by number of factors 
  vss1 vss2   map dof chisq    prob sqresid  fit RMSEA  BIC SABIC complex eChisq
1 0.89 0.00 0.022 209  2284 0.0e+00    10.9 0.89 0.104  855  1519     1.0   2775
2 0.57 0.93 0.010 188   874 7.3e-89     6.7 0.93 0.063 -412   185     1.5    580
3 0.45 0.82 0.011 168   550 4.5e-42     5.7 0.94 0.050 -599   -66     1.9    281
4 0.45 0.82 0.014 149   406 1.2e-25     5.2 0.95 0.043 -613  -140     2.0    178
5 0.42 0.74 0.018 131   323 3.4e-18     4.9 0.95 0.040 -573  -157     2.2    138
6 0.44 0.72 0.022 114   260 1.8e-13     4.6 0.95 0.038 -519  -157     2.3    103
7 0.41 0.71 0.028  98   212 2.2e-10     4.3 0.96 0.036 -458  -147     2.4     79
8 0.41 0.69 0.033  83   159 1.1e-06     4.1 0.96 0.032 -409  -145     2.5     56
   SRMR eCRMS eBIC
1 0.080 0.084 1346
2 0.037 0.041 -706
3 0.026 0.030 -868
4 0.020 0.025 -841
5 0.018 0.024 -758
6 0.015 0.022 -677
7 0.014 0.021 -591
8 0.011 0.019 -512
reten_all_bic <- data.frame(reten_all_BIC$vss.stats %>% rownames_to_column("nfact") %>% top_n(-1, BIC))$nfact %>% as.numeric()

What are these factors?

efa_all_bic <- fa(d_all, nfactors = reten_all_bic, rotate = "oblimin",
                  scores = "tenBerge", impute = "median")
heatmap_fun(efa_all_bic, 
            factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL", "other")) + 
  # labs(title = "Minimizing BIC")
  labs(x = "Figure 1: Shared conceptual structure") +
  theme(axis.title.x = element_text(hjust = 0)) +
  guides(fill = guide_colorbar("factor loading", barheight = 15))
the condition has length > 1 and only the first element will be usedJoining, by = "capacity"
Joining, by = "factor"

Which capacities are attributed to which targets?

scoresplot_fun(efa_all_bic, target = "all", highlight = "supernatural",
            factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL", "other")) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1,
                                   color = c(rep("black", 8),
                                             rep("#984ea3", 2)),
                                   face = c(rep("plain", 8),
                                            rep("bold", 2)))) +
  scale_fill_manual(values = c("#e41a1c", "#4daf4a", "#377eb8", "gray")) +
  # labs(title = "Minimizing BIC")
  labs(title = "Figure 2: Factor scores")
the condition has length > 1 and only the first element will be usedIgnoring unknown aesthetics: yScale for 'fill' is already present. Adding another scale for 'fill', which
will replace the existing scale.

scoresplot_fun(efa_all_bic, target = c("ghosts", "God", "children")) + 
  # scale_fill_manual(values = c("#e41a1c", "#4daf4a", "#377eb8")) +
  labs(title = "Minimizing BIC")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedIgnoring unknown aesthetics: y

itemsplot_fun(efa_all_bic, target = c("ghosts", "God", "children")) + 
  labs(title = "Minimizing BIC")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedJoining, by = "capacity"

|=============================================          | 83% ~0 s remaining     
|==============================================         | 85% ~0 s remaining     
|===============================================        | 86% ~0 s remaining     
|================================================       | 89% ~0 s remaining     
|=================================================      | 91% ~0 s remaining     
|==================================================     | 93% ~0 s remaining     
|====================================================   | 95% ~0 s remaining     
|=====================================================  | 97% ~0 s remaining     
|====================================================== | 99% ~0 s remaining     
Joining, by = c("factor", "capacity", "order")

Preset criteria

How many factors to retain?

reten_all_k <- reten_fun(d_all, rot_type = "oblimin"); reten_all_k
[1] 2

What are these factors?

efa_all_k <- fa(d_all, nfactors = reten_all_k, rotate = "oblimin",
                  scores = "tenBerge", impute = "median")
heatmap_fun(efa_all_k) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = "factor"

Which capacities are attributed to which targets?

scoresplot_fun(efa_all_k, target = "all") + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
Ignoring unknown aesthetics: y

scoresplot_fun(efa_all_k, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedIgnoring unknown aesthetics: y

itemsplot_fun(efa_all_k, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedJoining, by = "capacity"

|==========================================             | 78% ~1 s remaining     
|===========================================            | 80% ~1 s remaining     
|============================================           | 81% ~0 s remaining     
|=============================================          | 83% ~0 s remaining     
|=============================================          | 84% ~0 s remaining     
|==============================================         | 85% ~0 s remaining     
|===============================================        | 87% ~0 s remaining     
|================================================       | 89% ~0 s remaining     
|=================================================      | 90% ~0 s remaining     
|==================================================     | 92% ~0 s remaining     
|===================================================    | 94% ~0 s remaining     
|====================================================   | 95% ~0 s remaining     
|=====================================================  | 97% ~0 s remaining     
|====================================================== | 99% ~0 s remaining     
Joining, by = c("factor", "capacity", "order")

3 factors

What are these factors?

efa_all_3 <- fa(d_all, nfactors = 3, rotate = "oblimin",
                scores = "tenBerge", impute = "median")
heatmap_fun(efa_all_3, 
            factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL")) + 
  labs(x = "Shared concpetual structure") +
  theme(axis.title.x = element_text(hjust = 0))
the condition has length > 1 and only the first element will be usedJoining, by = "capacity"
Joining, by = "factor"

  # labs(title = "Preset criteria (Weisman et al., 2017)")

Which capacities are attributed to which targets?

scoresplot_fun(efa_all_3, target = "all", highlight = "supernatural",
               factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL")) +
  scale_fill_manual(values = c("#e41a1c", "#4daf4a", "#377eb8")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1,
                                   color = c(rep("black", 8),
                                             rep("#984ea3", 2)),
                                   face = c(rep("plain", 8),
                                            rep("bold", 2))))
the condition has length > 1 and only the first element will be usedIgnoring unknown aesthetics: yScale for 'fill' is already present. Adding another scale for 'fill', which
will replace the existing scale.

scoresplot_fun(efa_all_3, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedIgnoring unknown aesthetics: y

itemsplot_fun(efa_all_3, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usedJoining, by = "capacity"

|========================================               | 74% ~1 s remaining     
|=========================================              | 76% ~1 s remaining     
|===========================================            | 78% ~1 s remaining     
|============================================           | 80% ~1 s remaining     
|=============================================          | 82% ~0 s remaining     
|==============================================         | 84% ~0 s remaining     
|===============================================        | 86% ~0 s remaining     
|================================================       | 88% ~0 s remaining     
|=================================================      | 90% ~0 s remaining     
|==================================================     | 92% ~0 s remaining     
|===================================================    | 94% ~0 s remaining     
|====================================================   | 96% ~0 s remaining     
|====================================================== | 98% ~0 s remaining     
Joining, by = c("factor", "capacity", "order")

Comparing conceptual structures

We’ll extract 4 factors from all samples, to keep things simple.

efa_us_ad_4 <- fa(d_us_ad_pilot, nfactors = 4, rotate = "oblimin")
efa_gh_ad_4 <- fa(d_gh_ad, nfactors = 4, rotate = "oblimin")
efa_gh_ch_4 <- fa(d_gh_ch, nfactors = 4, rotate = "oblimin")
efa_th_ad_4 <- fa(d_th_ad, nfactors = 4, rotate = "oblimin")
efa_th_ch_4 <- fa(d_th_ch, nfactors = 4, rotate = "oblimin")
efa_vt_ad_4 <- fa(d_vt_ad, nfactors = 4, rotate = "oblimin")
efa_vt_ch_4 <- fa(d_vt_ch, nfactors = 4, rotate = "oblimin")
plot_us_ad_4 <- heatmap_fun(efa_us_ad_4) + 
  guides(fill = "none") + 
  labs(x = "US: adults") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_gh_ad_4 <- heatmap_fun(efa_gh_ad_4) + 
  guides(fill = "none") + 
  labs(x = "Ghana: adults") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_gh_ch_4 <- heatmap_fun(efa_gh_ch_4) + 
  guides(fill = "none") + 
  labs(x = "Ghana: children") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_th_ad_4 <- heatmap_fun(efa_th_ad_4) + 
  guides(fill = "none") + 
  labs(x = "Thailand: adults") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_th_ch_4 <- heatmap_fun(efa_th_ch_4) + 
  guides(fill = "none") + 
  labs(x = "Thailand: children") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_vt_ad_4 <- heatmap_fun(efa_vt_ad_4) + 
  guides(fill = "none") + 
  labs(x = "Vanuatu: adults") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_vt_ch_4 <- heatmap_fun(efa_vt_ch_4) + 
  guides(fill = "none") + 
  labs(x = "Vanuatu: children") +
  theme(axis.title.x = element_text(hjust = 0))
Joining, by = "capacity"
Joining, by = "factor"
plot_grid(#plot_us_ad_4, 
          plot_gh_ad_4, plot_gh_ch_4, plot_th_ad_4, 
          plot_th_ch_4, plot_vt_ad_4, plot_vt_ch_4, 
          ncol = 2, labels = c("A", "B", "C", "D", "E", "F", "G"))

Counts

paste("US adults:", nrow(d_us_ad_pilot))
[1] "US adults: 131"
paste("GH adults:", nrow(d_gh_ad))
[1] "GH adults: 150"
paste("GH children:", nrow(d_gh_ch))
[1] "GH children: 149"
paste("TH adults:", nrow(d_th_ad))
[1] "TH adults: 150"
paste("TH children:", nrow(d_th_ch))
[1] "TH children: 152"
paste("VT adults:", nrow(d_vt_ad))
[1] "VT adults: 164"
paste("VT children:", nrow(d_vt_ch))
[1] "VT children: 169"
paste("Non-US:", sum(nrow(d_gh_ad), nrow(d_gh_ch),
                     nrow(d_th_ad), nrow(d_th_ch),
                     nrow(d_vt_ad), nrow(d_vt_ch)))
[1] "Non-US: 934"
---
title: "SRCD 2019 Symposium: Religious & metaphysical concepts (Srinivasan)"
output: 
  html_notebook:
    toc: true
    toc_float: true
---

```{r global_options, include = F}
knitr::opts_chunk$set(fig.width = 3, fig.asp = 0.67)
```

```{r}
# load required libraries
library(tidyverse)
library(langcog) # source: https://github.com/langcog/langcog-package
library(psych)
library(lme4)
library(cowplot)

# set theme for ggplots
theme_set(theme_bw())
```

```{r, include = F}
source("./scripts/max_factors_efa.R")
source("./scripts/plot_fun_beetles.R")
source("./scripts/reten_fun.R")
source("./scripts/clean_fun.R")
```

# Data preparation

```{r, include = F}
question_key <- read.csv("/Users/kweisman/Documents/Research (Stanford)/Projects/Templeton Grant/DEVELOPMENTAL TASKS/beetles:dimkid:factors/design/beetles cb.csv")
```

```{r, include = F, warning = FALSE}
# US adults PILOT
d_us_ad_pilot_raw <- read.csv("/Users/kweisman/Documents/Research (Stanford)/Projects/Templeton Grant/DEVELOPMENTAL TASKS/beetles:dimkid:factors/analysis/_US pilot/beetles_pilot2_tidy.csv")
d_us_ad_pilot <- d_us_ad_pilot_raw %>%
  filter(scale == "beetles") %>%
  distinct(subid, character, question, response) %>%
  filter(!question %in% c("bleed", "mind", "soul")) %>%
  mutate(question = recode(question,
                           "add_subtract" = "add and subtract numbers",
                           "angry" = "get angry",
                           # "bleed" = "bleed when they touch something sharp",
                           "choose" = "choose what to do",
                           "figure_out" = "figure out how to do things",
                           "guilty" = "feel guilty",
                           "happy" = "feel happy",
                           "hear" = "hear things",
                           "hungry" = "get hungry",
                           "hurt_feelings" = "get hurt feelings",
                           "love" = "feel love",
                           # "mind" = "have minds",
                           "pain" = "feel pain",
                           "pray" = "pray", 
                           "proud" = "feel proud",
                           "remember" = "remember things",
                           "sad" = "feel sad",
                           "scared" = "feel scared",
                           "sense_far" = "sense when things are far away",
                           "sense_temp" = "sense temperatures",
                           "shy" = "feel shy",
                           "sick" = "feel sick, like when you feel like you might vomit",
                           "smell" = "smell things",
                           # "soul" = "have souls",
                           "think" = "think about things",
                           "tired" = "feel tired")) %>%
  spread(question, response) %>%
  select(-subid) %>%
  mutate(subid = paste("us_ad",
                       10001:(10000+length(levels(factor(d_us_ad_pilot_raw$subid)))),
                       "target",
                       character,
                       sep = "_")) %>%
  column_to_rownames("subid") %>%
  select(-`add and subtract numbers`, -character)
```

```{r, include = F, warning = FALSE}
## US adults: NOT YET RUN
## US children: NOT YET RUN

## GH adults: NOT YET RUN
d_gh_ad <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_GHANA_2018/beetles_ghana_adults_tidy_2018-08-12.csv") %>% clean_fun(key = question_key, ex_addsub = T, site = "gh", age = "ad")
## GH children
d_gh_ch <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_GHANA/beetles_ghana_tidy_2017-07-12.csv") %>% clean_fun(key = question_key, ex_addsub = T, site = "gh", age = "ch")
d_gh_ch_fante <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_GHANA_2018/beetles_ghana_fante_children_tidy_2018-07-19.csv")[-1] %>% 
  rename(subnum = subid) %>% 
  filter(grepl("fante", tolower(language_home)) | grepl("twi", tolower(language_home))) %>%
  clean_fun(key = question_key, ex_addsub = T, site = "gh", age = "ch")

## CH adults: NOT YET RUN
## CH children: NOT YET RUN

## TH adults
d_th_ad <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_THAILAND/beetles_thailand_adults_tidy_2018-05-09.csv") %>% clean_fun(key = question_key, ex_addsub = T, site = "th", age = "ad")
## TH children
d_th_ch <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_THAILAND/beetles_thailand_children_tidy_2018-05-09.csv") %>% clean_fun(key = question_key, ex_addsub = T, site = "th", age = "ch")

## VT adults
d_vt_ad <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_VANUATU/beetles_vanuatu_adults_tidy_2018-05-09.csv") %>% clean_fun(key = question_key, ex_addsub = T, site = "vt", age = "ad")
## VT children
d_vt_ch <- read.csv("/Users/kweisman/Desktop/TEMP/TEMP_VANUATU/beetles_vanuatu_children_tidy_2018-05-09.csv") %>% clean_fun(key = question_key, ex_addsub = T, site = "vt", age = "ch")
```

```{r}
d_all <- # d_us_ad_pilot %>% rownames_to_column("subid") %>%
  d_gh_ad %>% rownames_to_column("subid") %>%
  full_join(d_gh_ch %>% rownames_to_column("subid")) %>%
  full_join(d_th_ad %>% rownames_to_column("subid")) %>%
  full_join(d_th_ch %>% rownames_to_column("subid")) %>%
  full_join(d_vt_ad %>% rownames_to_column("subid")) %>%
  full_join(d_vt_ch %>% rownames_to_column("subid")) %>%
  column_to_rownames("subid")
```

# Shared conceptual structure

Pooling all participants from all sites together into a common factor structure.

## Parallel analysis

### How many factors to retain?

```{r}
reten_all_PA <- fa.parallel(d_all, plot = F); reten_all_PA
reten_all_par <- reten_all_PA$nfact
```

### What are these factors?

```{r}
efa_all_par <- fa(d_all, nfactors = reten_all_par, rotate = "oblimin",
                  scores = "tenBerge", impute = "median")
```

```{r, fig.width = 3, fig.asp = 2}
heatmap_fun(efa_all_par) + 
  labs(title = "Parallel Analysis")
```

### Which capacities are attributed to which targets?

```{r, fig.width = 6, fig.asp = 0.8}
scoresplot_fun(efa_all_par, target = "all") + 
  labs(title = "Parallel Analysis")
```

```{r, fig.width = 3, fig.asp = 1.5}
scoresplot_fun(efa_all_par, target = c("ghosts", "God", "children")) + 
  labs(title = "Parallel Analysis")
```

```{r, fig.width = 5, fig.asp = 1}
itemsplot_fun(efa_all_par, target = c("ghosts", "God", "children")) +
  labs(title = "Parallel Analysis")
```


## Minimizing BIC

### How many factors to retain?

```{r}
reten_all_BIC <- VSS(d_all, plot = F); reten_all_BIC
reten_all_bic <- data.frame(reten_all_BIC$vss.stats %>% rownames_to_column("nfact") %>% top_n(-1, BIC))$nfact %>% as.numeric()
```

### What are these factors?

```{r}
efa_all_bic <- fa(d_all, nfactors = reten_all_bic, rotate = "oblimin",
                  scores = "tenBerge", impute = "median")
```

```{r, fig.width = 4.5}
heatmap_fun(efa_all_bic, 
            factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL", "other")) + 
  # labs(title = "Minimizing BIC")
  labs(x = "Figure 1: Shared conceptual structure") +
  theme(axis.title.x = element_text(hjust = 0)) +
  guides(fill = guide_colorbar("factor loading", barheight = 15))
```

### Which capacities are attributed to which targets?

```{r, fig.width = 5, fig.asp = 1}
scoresplot_fun(efa_all_bic, target = "all", highlight = "supernatural",
            factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL", "other")) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1,
                                   color = c(rep("black", 8),
                                             rep("#984ea3", 2)),
                                   face = c(rep("plain", 8),
                                            rep("bold", 2)))) +
  scale_fill_manual(values = c("#e41a1c", "#4daf4a", "#377eb8", "gray")) +
  # labs(title = "Minimizing BIC")
  labs(title = "Figure 2: Factor scores")
```

```{r, fig.width = 3.5, fig.asp = 1}
scoresplot_fun(efa_all_bic, target = c("ghosts", "God", "children")) + 
  # scale_fill_manual(values = c("#e41a1c", "#4daf4a", "#377eb8")) +
  labs(title = "Minimizing BIC")
```

```{r, fig.width = 5, fig.asp = 1}
itemsplot_fun(efa_all_bic, target = c("ghosts", "God", "children")) + 
  labs(title = "Minimizing BIC")
```



## Preset criteria

### How many factors to retain?

```{r}
reten_all_k <- reten_fun(d_all, rot_type = "oblimin"); reten_all_k
```

### What are these factors?

```{r}
efa_all_k <- fa(d_all, nfactors = reten_all_k, rotate = "oblimin",
                  scores = "tenBerge", impute = "median")
```

```{r, fig.width = 3, fig.asp = 2}
heatmap_fun(efa_all_k) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
```

### Which capacities are attributed to which targets?

```{r, fig.width = 6, fig.asp = 0.8}
scoresplot_fun(efa_all_k, target = "all") + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
```

```{r, fig.width = 3, fig.asp = 1.5}
scoresplot_fun(efa_all_k, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
```

```{r, fig.width = 5, fig.asp = 1}
itemsplot_fun(efa_all_k, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
```


## 3 factors

### What are these factors?

```{r}
efa_all_3 <- fa(d_all, nfactors = 3, rotate = "oblimin",
                scores = "tenBerge", impute = "median")
```

```{r, fig.width = 4, fig.asp = 0.6}
heatmap_fun(efa_all_3, 
            factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL")) + 
  labs(x = "Shared concpetual structure") +
  theme(axis.title.x = element_text(hjust = 0))
  # labs(title = "Preset criteria (Weisman et al., 2017)")
```

### Which capacities are attributed to which targets?

```{r, fig.width = 6, fig.asp = 0.67}
scoresplot_fun(efa_all_3, target = "all", highlight = "supernatural",
               factor_names = c("COGNITIVE", "BODILY", "SOCIAL-EMOTIONAL")) +
  scale_fill_manual(values = c("#e41a1c", "#4daf4a", "#377eb8")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1,
                                   color = c(rep("black", 8),
                                             rep("#984ea3", 2)),
                                   face = c(rep("plain", 8),
                                            rep("bold", 2))))
```

```{r, fig.width = 3, fig.asp = 1.5}
scoresplot_fun(efa_all_3, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
```

```{r, fig.width = 5, fig.asp = 1}
itemsplot_fun(efa_all_3, target = c("ghosts", "God", "children")) + 
  labs(title = "Preset criteria (Weisman et al., 2017)")
```


# Comparing conceptual structures

We'll extract 4 factors from all samples, to keep things simple.

```{r}
efa_us_ad_4 <- fa(d_us_ad_pilot, nfactors = 4, rotate = "oblimin")
efa_gh_ad_4 <- fa(d_gh_ad, nfactors = 4, rotate = "oblimin")
efa_gh_ch_4 <- fa(d_gh_ch, nfactors = 4, rotate = "oblimin")
efa_th_ad_4 <- fa(d_th_ad, nfactors = 4, rotate = "oblimin")
efa_th_ch_4 <- fa(d_th_ch, nfactors = 4, rotate = "oblimin")
efa_vt_ad_4 <- fa(d_vt_ad, nfactors = 4, rotate = "oblimin")
efa_vt_ch_4 <- fa(d_vt_ch, nfactors = 4, rotate = "oblimin")
```

```{r}
plot_us_ad_4 <- heatmap_fun(efa_us_ad_4) + 
  guides(fill = "none") + 
  labs(x = "US: adults") +
  theme(axis.title.x = element_text(hjust = 0))

plot_gh_ad_4 <- heatmap_fun(efa_gh_ad_4) + 
  guides(fill = "none") + 
  labs(x = "Ghana: adults") +
  theme(axis.title.x = element_text(hjust = 0))

plot_gh_ch_4 <- heatmap_fun(efa_gh_ch_4) + 
  guides(fill = "none") + 
  labs(x = "Ghana: children") +
  theme(axis.title.x = element_text(hjust = 0))

plot_th_ad_4 <- heatmap_fun(efa_th_ad_4) + 
  guides(fill = "none") + 
  labs(x = "Thailand: adults") +
  theme(axis.title.x = element_text(hjust = 0))

plot_th_ch_4 <- heatmap_fun(efa_th_ch_4) + 
  guides(fill = "none") + 
  labs(x = "Thailand: children") +
  theme(axis.title.x = element_text(hjust = 0))

plot_vt_ad_4 <- heatmap_fun(efa_vt_ad_4) + 
  guides(fill = "none") + 
  labs(x = "Vanuatu: adults") +
  theme(axis.title.x = element_text(hjust = 0))

plot_vt_ch_4 <- heatmap_fun(efa_vt_ch_4) + 
  guides(fill = "none") + 
  labs(x = "Vanuatu: children") +
  theme(axis.title.x = element_text(hjust = 0))
```

```{r, fig.asp = 1.5}
plot_grid(#plot_us_ad_4, 
          plot_gh_ad_4, plot_gh_ch_4, plot_th_ad_4, 
          plot_th_ch_4, plot_vt_ad_4, plot_vt_ch_4, 
          ncol = 2, labels = c("A", "B", "C", "D", "E", "F", "G"))
```

# Counts

```{r}
paste("US adults:", nrow(d_us_ad_pilot))
paste("GH adults:", nrow(d_gh_ad))
paste("GH children:", nrow(d_gh_ch))
paste("TH adults:", nrow(d_th_ad))
paste("TH children:", nrow(d_th_ch))
paste("VT adults:", nrow(d_vt_ad))
paste("VT children:", nrow(d_vt_ch))

paste("Non-US:", sum(nrow(d_gh_ad), nrow(d_gh_ch),
                     nrow(d_th_ad), nrow(d_th_ch),
                     nrow(d_vt_ad), nrow(d_vt_ch)))
```

